[XEN] Shadow: emulate a few extra instructions on PAE pagetable writes
authorTim Deegan <Tim.Deegan@xensource.com>
Fri, 1 Jun 2007 13:32:11 +0000 (14:32 +0100)
committerTim Deegan <Tim.Deegan@xensource.com>
Fri, 1 Jun 2007 13:32:11 +0000 (14:32 +0100)
in the hope of catching the "other half" write without another enter/exit.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
xen/arch/x86/mm/shadow/common.c
xen/arch/x86/mm/shadow/multi.c
xen/arch/x86/mm/shadow/private.h
xen/include/asm-x86/domain.h
xen/include/asm-x86/perfc_defn.h

index a4598e4bc22d0bda562a0a2198704cb6a959ad3b..2690acbeb259a3f77fc55a43121b371a74c3f56a 100644 (file)
@@ -248,7 +248,7 @@ hvm_emulate_insn_fetch(enum x86_segment seg,
 {
     struct sh_emulate_ctxt *sh_ctxt =
         container_of(ctxt, struct sh_emulate_ctxt, ctxt);
-    unsigned int insn_off = offset - ctxt->regs->eip;
+    unsigned int insn_off = offset - sh_ctxt->insn_buf_eip;
 
     /* Fall back if requested bytes are not in the prefetch cache. */
     if ( unlikely((insn_off + bytes) > sh_ctxt->insn_buf_bytes) )
@@ -450,6 +450,7 @@ struct x86_emulate_ops *shadow_init_emulation(
     }
 
     /* Attempt to prefetch whole instruction. */
+    sh_ctxt->insn_buf_eip = regs->eip;
     sh_ctxt->insn_buf_bytes =
         (!hvm_translate_linear_addr(
             x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
@@ -461,6 +462,35 @@ struct x86_emulate_ops *shadow_init_emulation(
     return &hvm_shadow_emulator_ops;
 }
 
+/* Update an initialized emulation context to prepare for the next 
+ * instruction */
+void shadow_continue_emulation(struct sh_emulate_ctxt *sh_ctxt, 
+                               struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    unsigned long addr, diff;
+
+    /* We don't refetch the segment bases, because we don't emulate
+     * writes to segment registers */
+
+    if ( is_hvm_vcpu(v) )
+    {
+        diff = regs->eip - sh_ctxt->insn_buf_eip;
+        if ( diff > sh_ctxt->insn_buf_bytes )
+        {
+            /* Prefetch more bytes. */
+            sh_ctxt->insn_buf_bytes =
+                (!hvm_translate_linear_addr(
+                    x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
+                    hvm_access_insn_fetch, sh_ctxt, &addr) &&
+                 !hvm_copy_from_guest_virt(
+                     sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
+                ? sizeof(sh_ctxt->insn_buf) : 0;
+            sh_ctxt->insn_buf_eip = regs->eip;
+        }
+    }
+}
+
 /**************************************************************************/
 /* Code for "promoting" a guest page to the point where the shadow code is
  * willing to let it be treated as a guest page table.  This generally
index cfd4c70c210286561328ca969c6d90a04925df26..c5fd34a3ca9dd82ca07cbd2b50a9a26275cc8cac 100644 (file)
@@ -2871,6 +2871,20 @@ static int sh_page_fault(struct vcpu *v,
     if ( !shadow_mode_refcounts(d) || !guest_mode(regs) )
         goto not_a_shadow_fault;
 
+    /*
+     * We do not emulate user writes. Instead we use them as a hint that the
+     * page is no longer a page table. This behaviour differs from native, but
+     * it seems very unlikely that any OS grants user access to page tables.
+     */
+    if ( (regs->error_code & PFEC_user_mode) )
+    {
+        SHADOW_PRINTK("user-mode fault to PT, unshadowing mfn %#lx\n", 
+                      mfn_x(gmfn));
+        perfc_incr(shadow_fault_emulate_failed);
+        sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
+        goto done;
+    }
+
     if ( is_hvm_domain(d) )
     {
         /*
@@ -2897,14 +2911,7 @@ static int sh_page_fault(struct vcpu *v,
 
     emul_ops = shadow_init_emulation(&emul_ctxt, regs);
 
-    /*
-     * We do not emulate user writes. Instead we use them as a hint that the
-     * page is no longer a page table. This behaviour differs from native, but
-     * it seems very unlikely that any OS grants user access to page tables.
-     */
-    r = X86EMUL_UNHANDLEABLE;
-    if ( !(regs->error_code & PFEC_user_mode) )
-        r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
+    r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
 
     /*
      * NB. We do not unshadow on X86EMUL_EXCEPTION. It's not clear that it
@@ -2922,6 +2929,35 @@ static int sh_page_fault(struct vcpu *v,
         sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
     }
 
+#if GUEST_PAGING_LEVELS == 3 /* PAE guest */
+    if ( r == X86EMUL_OKAY ) {
+        int i;
+        /* Emulate up to four extra instructions in the hope of catching 
+         * the "second half" of a 64-bit pagetable write. */
+        for ( i = 0 ; i < 4 ; i++ )
+        {
+            shadow_continue_emulation(&emul_ctxt, regs);
+            v->arch.paging.last_write_was_pt = 0;
+            r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
+            if ( r == X86EMUL_OKAY )
+            {
+                if ( v->arch.paging.last_write_was_pt )
+                {
+                    perfc_incr(shadow_em_ex_pt);
+                    break; /* Don't emulate past the other half of the write */
+                }
+                else 
+                    perfc_incr(shadow_em_ex_non_pt);
+            }
+            else
+            {
+                perfc_incr(shadow_em_ex_fail);
+                break; /* Don't emulate again if we failed! */
+            }
+        }
+    }
+#endif /* PAE guest */
+
     /* Emulator has changed the user registers: write back */
     if ( is_hvm_domain(d) )
         hvm_load_cpu_guest_regs(v, regs);
@@ -3878,6 +3914,11 @@ static inline void * emulate_map_dest(struct vcpu *v,
     gfn_t gfn;
     mfn_t mfn;
 
+    /* We don't emulate user-mode writes to page tables */
+    if ( ring_3(sh_ctxt->ctxt.regs) ) 
+        return NULL;
+
+    /* Walk the guest pagetables */
     guest_walk_tables(v, vaddr, &gw, 1);
     flags = accumulate_guest_flags(v, &gw);
     gfn = guest_l1e_get_gfn(gw.eff_l1e);
@@ -3885,27 +3926,24 @@ static inline void * emulate_map_dest(struct vcpu *v,
     sh_audit_gw(v, &gw);
     unmap_walk(v, &gw);
 
-    if ( !(flags & _PAGE_PRESENT) )
-    {
-        errcode = 0;
+    errcode = PFEC_write_access;
+    if ( !(flags & _PAGE_PRESENT) ) 
         goto page_fault;
-    }
 
-    if ( !(flags & _PAGE_RW) ||
-         (!(flags & _PAGE_USER) && ring_3(sh_ctxt->ctxt.regs)) )
-    {
-        errcode = PFEC_page_present;
+    errcode |= PFEC_page_present;
+    if ( !(flags & _PAGE_RW) ) 
         goto page_fault;
-    }
 
-    if ( !mfn_valid(mfn) )
+    if ( mfn_valid(mfn) )
+    {
+        *mfnp = mfn;
+        v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
+        return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
+    }
+    else 
         return NULL;
 
-    *mfnp = mfn;
-    return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
-
  page_fault:
-    errcode |= PFEC_write_access;
     if ( is_hvm_vcpu(v) )
         hvm_inject_exception(TRAP_page_fault, errcode, vaddr);
     else
index 70c7c34010200377c00595f7465d975c52e79d9b..52ea6d5d16d007a2aeea5ca8eab3e59988d19c41 100644 (file)
@@ -634,9 +634,10 @@ static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
 struct sh_emulate_ctxt {
     struct x86_emulate_ctxt ctxt;
 
-    /* [HVM] Cache of up to 15 bytes of instruction. */
-    uint8_t insn_buf[15];
+    /* [HVM] Cache of up to 31 bytes of instruction. */
+    uint8_t insn_buf[31];
     uint8_t insn_buf_bytes;
+    unsigned long insn_buf_eip;
 
     /* [HVM] Cache of segment registers already gathered for this emulation. */
     unsigned int valid_seg_regs;
@@ -645,6 +646,8 @@ struct sh_emulate_ctxt {
 
 struct x86_emulate_ops *shadow_init_emulation(
     struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
+void shadow_continue_emulation(
+    struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
 
 #endif /* _XEN_SHADOW_PRIVATE_H */
 
index 6bb849731933540967001406755d83fed7dfbbb9..30c7601d1a41972a43b31d402bfdc7da8882a46d 100644 (file)
@@ -171,6 +171,8 @@ struct paging_vcpu {
     struct paging_mode *mode;
     /* HVM guest: paging enabled (CR0.PG)?  */
     unsigned int translate_enabled:1;
+    /* HVM guest: last emulate was to a pagetable */
+    unsigned int last_write_was_pt:1;
 
     /* paging support extension */
     struct shadow_vcpu shadow;
index 053e897774d9bd9c40faaaafc047e95c9a0cf08a..56bb30331a6d13d9f70aecf543c2d996269f8525 100644 (file)
@@ -90,5 +90,8 @@ PERFCOUNTER(shadow_guest_walk,     "shadow walks guest tables")
 PERFCOUNTER(shadow_invlpg,         "shadow emulates invlpg")
 PERFCOUNTER(shadow_invlpg_fault,   "shadow invlpg faults")
 
+PERFCOUNTER(shadow_em_ex_pt,       "shadow extra pt write")
+PERFCOUNTER(shadow_em_ex_non_pt,   "shadow extra non-pt-write op")
+PERFCOUNTER(shadow_em_ex_fail,     "shadow extra emulation failed")
 
 /*#endif*/ /* __XEN_PERFC_DEFN_H__ */